{
 "cells": [
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "## 2.6 Q学習で迷路を攻略"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 1,
   "metadata": {},
   "outputs": [],
   "source": [
    "# 使用するパッケージの宣言\n",
    "import numpy as np\n",
    "import matplotlib.pyplot as plt\n",
    "%matplotlib inline\n"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 2,
   "metadata": {},
   "outputs": [
    {
     "data": {
      "image/png": "iVBORw0KGgoAAAANSUhEUgAAAUQAAAEzCAYAAABJzXq/AAAAOXRFWHRTb2Z0d2FyZQBNYXRwbG90bGliIHZlcnNpb24zLjUuMywgaHR0cHM6Ly9tYXRwbG90bGliLm9yZy/NK7nSAAAACXBIWXMAAAsTAAALEwEAmpwYAAAk9UlEQVR4nO3deXRUZZ7/8fc3G1tA+AkiBFkcVDrYtprAgPZxARxRB+0W8SdMq4CCCy6/Fo/dzjja2u0cN/Row6g5DbiOu9jgqG0UxFZUOiiiLK2MDULAIUT2kKXI9/dHXegQE6oSUrlVxed1Th2q6j731jcP4cNzt6fM3REREcgIuwARkWShQBQRCSgQRUQCCkQRkYACUUQkoEAUEQnEDEQza2tmi83sczNbbmZ3NtCmjZm9YGarzewTM+ubkGpFRBIonhFiFTDM3X8CnAiMNLMh9dpcAWxx9/7AQ8C9sTZqZpObWGvSSNXaU7VuSN3aU7VuSN3aD6bumIHoUTuDl9nBo/7V3BcATwbPXwaGm5nF2HRKdnYgVWtP1bohdWtP1bohdWtPXCACmFmmmS0FNgHF7v5JvSZ5wDoAd48A24DDm1uUiEgYrCm37plZZ2AOcL27f1nn/S+Bke6+Pnj9P8A/uvvmeutPJkjvNm3aFBx//PEH/QOEoaysjG7duoVdRpOlat2QurWnat2QurUvWbKkCviyzltF7l4Uz7pZTfkgd99qZguAkfU+sBQ4ClhvZlnAYUB5A+sXAUUAhYWFXlJS0pSPFxGJycy+dPfC5qwbz1nmbsHIEDNrB5wFrKrXbC5wefD8ImC+a9YIEUkx8YwQewBPmlkm0QB90d1fN7O7gBJ3nwvMBJ42s9XA98AlCatYRCRBYgaiuy8DTmrg/dvrPK8ExrRsaSIirUt3qoiIBBSIIiIBBaKISECBKCISUCCKiAQUiCIiAQWiiEhAgSgiElAgiogEFIgiIoEmzXZzqHB3SneUsmTDEhaXLmbh2oWsKFvB7shuIrUR9tTuITMjk6yMLNpltSO/Wz6n9zmdwXmDKehZQF7HPGLPjysiyUaBGKj1Wt795l0e/PhBPvz2QyK1EbIzs9lZvZNar/1B+0hthEhthMpIJR+u+5CP1n9Ebk4u1Xuqyc7I5tTep3LTkJsYfvRwMkwDcZFUcMgH4pbdW5j12SymfTSNHdU72Fm9c9+y3ZHdcW+n1mvZXrUdgEoqeWv1W3zw7Qd0zOnI1KFTmXjSRLq069Li9YtIy2nSjNktKewJYtdvX88txbcwZ9UcMiyDipqKhH1W++z21HotFw64kHvPupdenXol7LNEDnVmtiRhE8SmG3dn5mczGTB9AC8tf4nKSGVCwxCgoqaCykglLy5/kQHTBzDzs5lo/lyR5HNIBWLp9lLOfPJMbnzzRnbV7CLikVb9/IhH2FWzixvfvJEznzyT0u2lrfr5InJgh0wgzl46mwHTB/Dhug/ZVbMr1Fp21eziw3UfMmDGAGYvnR1qLSLyd2kfiO7OL9/6Jde9cR07a3YSqW3dUWFjIrURdlbv5Lo3ruOmP92kXWiRJJDWgbindg/jXxtP0adFCT9O2FwVNRU8vuRxJvxxAntq94RdjsghLW0vu3F3Jv5xIi+vfDlpw3CvipoKXlrxEgCzL5iti7pFQpK2I8Sb/nQTr6x8JenDcK+9oTj17alhlyJyyErLQJy9dDZFnxaFfvKkqfbuPutEi0g40i4QS7eXcsMbN6TMyLC+ipoKbnjzBl2SIxKCtApEd2fcq+Oo3FMZdikHpSpSxb+8+i868yzSytIqEGctncWSDUuS5tKa5qqpraFkQ4l2nUVaWdoE4vrt6/fdgZIOdtXs4sa3btSus0grSptAvKX4FqoiVWGX0aIqI5XcUnxL2GWIHDLSIhC37N7CnFVzWv3e5ESL1EZ4ddWrbNm9JexSRA4JaRGIsz6blbaTsGZYho4lirSSlE+RWq9l2kfTUvYym1gqaiqYtmhag7N2i0jLSvlAfPebd9lRvaPlN7wLeB14CPgtcD/wJPA/wXIHFgAPAL8DZgObWr4MgO3V25n/t/mJ2XgSKSsr49prr6Vv3760adOG7t27M3z4cIqLiwF49dVXOfvss+nWrRtmxnvvvRduwWngQH1eU1PDr371K0444QQ6dOhAjx49GDduHN9++23YZSdMyt/L/ODHD+437X+LeQGoAS4A/g/RgFwD7B2Ifgh8BPwMOBxYCDwFXA+0adlSdlbvZNpH0xhx9IiW3XCSGT16NBUVFcycOZP+/fuzadMmFi5cSHl5OQC7du3ilFNO4Re/+AWXXXZZyNWmhwP1eUVFBZ9++in/9m//xoknnsi2bduYOnUqI0eOZNmyZWRlpXx8/EBKf4WAu3PYPYe1/AhxN3AvcCnwDw19MDANGAycFrxXQ3QU+U9AsyYvP7BObTqx9Vdb03bih61bt9KlSxeKi4sZMeLAwb9582a6devGggULOOOMM1qnwDTUlD7fa8WKFQwcOJBly5bx4x//OMEVNs8h+xUCpTtKqamtafkN5wSPvxINuvq2ADvZPyyzgT7AupYvB6B6TzUbdmxIzMaTQG5uLrm5ucydO5fKytS+0yhVNKfPt2+PfpFaly7p+YVpKR2ISzYsISczp+U3nEl0V3gZcA/wB+BPwPpg+d499A711utQZ1kLy8nMYcnGJYnZeBLIysriiSee4JlnnqFz584MHTqUm2++mU8++STs0tJWU/u8urqaqVOnMmrUKHr1Ss8vSkvpQFxcujgxxw8B8oGpwDigP9GR3x+A9xPzcbHsqt7F4tLF4Xx4Kxk9ejQbNmxg3rx5nHPOOSxatIghQ4bwH//xH2GXlrbi7fNIJMIvfvELtm7dyuzZ6XsZWMxANLOjzGyBma0ws+VmdmMDbc4ws21mtjR43J6Ycve3cO3CxF6Okk10t/gM4ErgJOA9oH2wvP5dgruA3MSUssf3sHDtwsRsPIm0bduWs846i9tvv51FixZxxRVX8Jvf/Ibq6uqwS0tbsfo8EokwduxYli1bxrvvvsvhhx8ecsWJE89poggw1d0/NbOOwBIzK3b3FfXa/dnd/7nlS2zcirL6JSRYN6CWaOjlEr0EJy9YVgOsJXpSJUFa/edNAvn5+UQiESorK8nJScDhEfmBun1uZlxyySV8+eWXvPfeexx55JFhl5dQMQPR3TcCG4PnO8xsJdEYCP1f5+7I7sRsuAJ4keiIsDvRy2g2EL3U5migLTAE+DPQlehlN+8TPRGTwBNvu2sS9PMmgfLycsaMGcPEiRM54YQT6NixIyUlJdx3330MHz6cTp068f333/Ptt9+ydetWAFavXk3nzp058sgj0/4faiLE6vP27dtz0UUX8Ze//IV58+ZhZnz33XcAHHbYYbRr1y7kn6DlNelCIjPrSzQmGjrqOtTMPicaHTe7+/KDL+/AEjbNVw7Qi+hP+T3RMXInomG39zKbU4mOCt8geplOL6KX6bTwNYh1JeSMepLIzc1lyJAhPPzww6xevZqqqiry8vIYN24ct912GwBz585lwoQJ+9aZNGkSAHfccQe/+c1vwig7pcXq8/Xr1/PHP/4RgIKCgv3WnT17NuPHjw+h6sSK+zpEM8slevnx3e7+ar1lnYBad99pZucCD7v7MQ1sYzIwGaB3794Fa9euPajiM+7MwDl0JlE1jNo7dAufyIGY2Vpgc523ity9KJ514xohmlk28ArwbP0wBHD37XWev2Fm/2lmXd19c712RUARRC/MjuezDyQzIzPlJ4NtisyMzLBLEEkFmxN2YbZFb42YCax09wcbaXNk0A4zGxxst7w5BTVFVkb63Tp0INkZ2WGXIJLW4kmUU4keHfvCzJYG7/0r0BvA3R8DLgKuMbMI0SNql3gr3BPYLqsdlZFD566GdtnpdxBbJJnEc5b5A+CAN9C6+3RgeksVFa/8bvl8uO7D1v7Y0OR3yw+7BJG0ltJ3qpze5/S0nRi2vkzL5PQ+p4ddhkhaS+k0GZw3mNycBN0akmQ65HRgcN7gsMsQSWspHYgFPQuo3nNo3NJVvaeagh4FsRuKSLOldCDmdcw7ZM685mTm0LNjz7DLEElrKR2IZsapvU8Nu4xWccpRp6Tt5LAiySKlAxHgpiE3pf1xxNycXKYOnRp2GSJpL+WvbB5+9HA65nRs3ryI7wNfEL2oyIB2RK+irCY6wUPnoN15RK+6fJToZA5j6mxjDtFZbvbew3w20Ylk997JvQk4Inh+EtFJIZqoU5tODOs3rOkrikiTpHwgZlgGU4dO5fb3bm/aV5GuA74CriLaC7uAPUQncfgbsAj4lzrty4h+l8q3RAOz7kxUZwEDg/XmATfw90kg7gauaepP9Xfts9szdejUQ+byIpEwpcW/soknTWz6RLE7iE70uve/hA5Ew7AxXwAnEJ0wdlUjbXoB2xtZ1ky1XsuEEyfEbigiBy0tArFLuy78fMDPybImDHj/AdgGPEL0+5fXxGi/HDg+eHzZSJvVwID4S4glKyOLCwdcSJd26fmFPiLJJi0CEeC+s+6jTVYTJiNsQ3R3eRTR0eFLwGeNtC0lOprsTHSC2I38/fuZAYqJBusrwE+bVveBtM1qy31n3ddyGxSRA0qbQOzVqRcPn/MwHbLrfxXeAWQA/YAzgXOBlY20+5Lo7GoPAQ8DVfXankX0uOFZwB+bWnnDOmR34OGRD5PXKS92YxFpEWkTiAATT5xIYc/C+KYF28z+E5R9BxzWQLtaorvL1wC/DB5jiR5TrG8w0RMvq5tU9g9kZ2QzKG+Qjh2KtLK0CkQz49kLn6VtZtvYjauJXjIzHfhPomeRz2ig3bdAR/Y/4dInaL+jfgFEzy4f5AQ8bbLa8MzPn9GF2CKtLO6vEGhphYWFXlJSkpBtz146m+veuK5pl+EkifbZ7Zl+7nSNDkWaycyWJGzG7FQ04cQJTD55Mu2z28dunEQ6ZHfgqoKrFIYiIUnLQAR48OwHuehHF6VMKLbPbs9F+Rcx7Z+mhV2KyCErbQPRzJh1wSzG5I9J+lBsn92eMfljmHn+TB03FAlR2gYiRL+lbvYFs7mq4KqkDcX22e25uuBqZl8wW9+qJxKytA5EiI4UHzz7QaafO53cnNyk+aa+7IxscnNymX7udKadPU0jQ5EkkPaBuNeEEyewasoqTj3q1KZdvJ0AHbI7cMpRp7BqyiqdQBFJIodMIALkdcpjweULeOScR6Kjxabc+9wCsjKyyM3J5ZFzHmHB5Qt0F4pIkjmkAhGiu9ATT5rIyikruXjgxbTNakv7rMQeX2yf1Z62WW25OP9iVk1ZxcSTJmoXWSQJJccBtRD06tSLZ0c/y5bdW5i9dDYPLHqAHdU7mjfRbCNyc3LplNOJqadMZcKJEzRrjUiSS8s7VZqj1muZ/7f5TPtoGovWLaJ6TzU5mTnsrN4Z11yLGZZBbk7uvvVOOeoUpg6dyrB+wzS5q0grOpg7VQ7ZEWJ9GZbBiKNHMOLoEbg7G3ZsYMnGJSwuXczCtQtZUbaC3TW7qamtYU/tHjIzMsnOyKZddjvyu+Vzep/TGZw3mIIeBfTs2FO7xCIpSIHYADMjr1MeeZ3yOP+488MuR0RaifblREQCCkQRkYACUUQkoEAUEQkoEEVEAgpEEZGAAlFEJKBAFBEJKBBFRAIKRBGRQMxANLOjzGyBma0ws+VmdmMDbczMHjGz1Wa2zMxOTky5IiKJE8+9zBFgqrt/amYdgSVmVuzuK+q0OQc4Jnj8I/Bo8KeISMqIOUJ0943u/mnwfAewEqg/1fMFwFMe9THQ2cx6tHi1IiIJ1KTZbsysL3AS8Em9RXnAujqv1wfvbTyY4qQFaTqy8IQ056g0XdwnVcwsF3gF+H/uvr05H2Zmk82sxMxKysrKmrMJEZFYuu7NmeAxOd4V4xohmlk20TB81t1fbaBJKXBUnde9gvf24+5FQBFEZ8yOt0hpARqltD6NysOyubkzZsdzltmAmcBKd3+wkWZzgcuCs81DgG3urt1lEUkp8YwQTwUuBb4ws6XBe/8K9AZw98eAN4BzgdVABaAvGxaRlBMzEN39A+CAY3+PflPVlJYqSkQkDLpTRUQkoEAUEQkoEEVEAgpEEZGAAlFEJKBAFBEJKBBFRAIKRBGRgAJRRCSgQBQRCSgQRUQCCkQRkYACUUQkoEAUEQkoEEVEAgpEEZGAAlFEJKBAFBEJKBBFRAIKRBGRgAJRRCSgQBQRCSgQRUQCCkQRkYACUUQkoEAUEQkoEEVEAgpEEZGAAlFEJKBAFBEJKBAbUVZWxrXXXkvfvn1p06YN3bt3Z/jw4RQXFwPw7//+7wwYMIAOHTrQpUsXhg8fzqJFi0KuOrXF6vO6rrrqKsyMBx54IIRK00esPh8/fjxmtt9jyJAhIVedOFlhF5CsRo8eTUVFBTNnzqR///5s2rSJhQsXUl5eDsBxxx3HjBkz6NevH7t37+ahhx5i5MiRfP3113Tv3j3k6lNTrD7f6+WXX2bx4sX07NkzpErTRzx9PmLECJ5++ul9r3NycsIotXW4eyiPgoICT1ZbtmxxwIuLi+NeZ9u2bQ74W2+9lcDK0le8fb5mzRrv2bOnr1ixwvv06eP3339/K1XYDBB9JKl4+vzyyy/38847rxWrOnhAiTczl7TL3IDc3Fxyc3OZO3culZWVMdtXV1dTVFREp06dOPHEExNfYBqKp88jkQhjx47ltttu40c/+lErV5h+4v09/+CDDzjiiCM49thjmTRpEps2bWrFKluXArEBWVlZPPHEEzzzzDN07tyZoUOHcvPNN/PJJ5/s1+71118nNzeXtm3b8tBDD1FcXKzd5WaKp8/vuOMOunbtyjXXXBNipekjnj4fOXIkTz31FO+++y7Tpk1j8eLFDBs2jKqqqhArT6DmDi0P9pHMu8x77d69299++22/8847fejQoQ743XffvW/5zp07/euvv/aPPvrIJ06c6H369PENGzaEWHHqa6zPFyxY4D179vRNmzbta6td5pYR6/e8rtLSUs/KyvJXXnmllauMHwexyxy7AcwCNgFfNrL8DGAbsDR43B7PB6dCINZ3xRVXeHZ2tldVVTW4vH///n7XXXe1clXpbW+f33rrrW5mnpmZue8BeEZGhufl5YVdZsNSJBDri/V73rdvX7/nnntauar4HUwgxnOW+QlgOvDUAdr82d3/uenj09SSn59PJBKhsrKywTNttbW16bsrEZK9fX711Vczbty4/ZadffbZjB07lkmTJoVUXXo60O/55s2bKS0tpUePHiFVl1gxA9Hd3zezvq1QS9IoLy9nzJgxTJw4kRNOOIGOHTtSUlLCfffdx/DhwwG47bbbGDVqFD169KCsrIwZM2awfv16Lr744pCrT02x+rx3794/WCc7O5sjjzyS4447LoSKU1+sPs/IyODmm29m9OjR9OjRgzVr1nDrrbdyxBFH8POf/zzs8hOipa5DHGpmnwMbgJvdfXkLbTcUubm5DBkyhIcffpjVq1dTVVVFXl4e48aN47bbbiMrK4vly5cza9YsysvLOfzwwxk0aBDvv/8+J5xwQtjlp6RYfS4tL1afZ2Zm8sUXX/DUU0+xdetWevTowZlnnsmLL75Ix44dwy4/ISy6yx2jUXSE+Lq7H9/Ask5ArbvvNLNzgYfd/ZhGtjMZmAzQu3fvgrVr1x5M7SLJzSz6Zxz/xqTlmNlaYHOdt4rcvSiedQ96hOju2+s8f8PM/tPMurr75gbaFgFFAIWFhfotEZFE2Ozuhc1Z8aCvQzSzI82i/xWa2eBgm+UHXktEJPnEHCGa2XNEL63pambrgTuAbAB3fwy4CLjGzCLAbuASj2c/XEQkycRzlnlsjOXTiV6WIyKS0nTrnohIQIEoIhJQIIqIBBSIIiIBBaKISECBKCISUCCKiAQUiCIiAQWiiEhAgSgiElAgiogEFIgiIgEFoohIQIEoIhJQIIqIBBSIIiIBBaKISECBKCISUCCKiAQUiCIiAQWiiEhAgSgiElAgiogEFIgiIgEFoohIQIEoIhJQIIqIBBSIIiIBBaKISECBKCISUCCKiAQUiCIiAQWiiEhAgSgiElAgiogEFIgiIoGYgWhms8xsk5l92chyM7NHzGy1mS0zs5NbvkwRkcSLZ4T4BDDyAMvPAY4JHpOBRw++LBGR1hczEN39feD7AzS5AHjKoz4GOptZj5YqUESktWS1wDbygHV1Xq8P3tvYAtuWlmIW/dM93DoORXv7XpJeq55UMbPJZlZiZiVlZWWt+dEicujoujdngsfkeFdsiRFiKXBUnde9gvd+wN2LgCKAwsJCDVUkvWk0Hg6zze5e2JxVW2KEOBe4LDjbPATY5u7aXRaRlBNzhGhmzwFnEB2GrgfuALIB3P0x4A3gXGA1UAFMSFSxIiKJFDMQ3X1sjOUOTGmxikREQqI7VUREAgpEEZGAAlFEJKBAFBEJKBBFRAIKRBGRgAJRRCSgQBQRCSgQRUQCCkQRkYACUUQkoEAUEQkoEEVEAgpEEZGAAlFEJKBAFBEJKBBFRAIKRBGRgAJRRCSgQBQRCSgQRUQCCkQRkYACUUQkoEAUEQkoEEVEAgpEEZGAAlFEJKBAFBEJKBBFRAIKRBGRgAJRRCSgQGxEWVkZ1157LX379qVNmzZ0796d4cOHU1xcvK/NV199xYUXXkjnzp1p3749J598MitXrgyx6tQWq8/NrMHHlClTQq48dcXq8507d3L99dfTq1cv2rVrx3HHHcdDDz0UctWJkxV2Aclq9OjRVFRUMHPmTPr378+mTZtYuHAh5eXlAPztb3/j1FNP5bLLLmP+/Pl07tyZVatWkZubG3LlqStWn2/cuHG/9iUlJYwaNYqLL744jHLTQqw+v+mmm3jnnXd4+umn6devH++//z6TJk2ia9euXHrppSFXnwDuHsqjoKDAk9WWLVsc8OLi4kbbjB071seNG9eKVR0kiD6SVDx9Xt+VV17pxx57bAKrSm/x9PnAgQP99ttv3++90047zadMmZLo8poNKPFm5pJ2mRuQm5tLbm4uc+fOpbKy8gfLa2trmTdvHvn5+YwcOZJu3boxaNAgXnjhhRCqTQ+x+ry+nTt38vzzzzNp0qRWqC49xdPnP/3pT5k3bx7r1q0DYNGiRSxdupSRI0e2Zqmtp7lJerCPZB4huru//PLL3qVLF2/Tpo0PGTLEp06d6h9//LG7u2/cuNEBb9++vU+bNs0/++wznzZtmmdmZvrrr78ecuWNSPIRovuB+7y+xx9/3HNycnzTpk2tXGV6idXnVVVVPn78eAc8KyvLs7Ky/NFHHw2x4tg4iBGiAvEAdu/e7W+//bbfeeedPnToUAf87rvv9tLSUgd87Nix+7UfO3asjxw5MqRqY0iBQHRvvM/rKyws9DFjxoRQYfo5UJ8/8MADfuyxx/rcuXP9888/99///vfeoUMHf/PNN0OuunEJD0RgJPBXYDXw6waWjwfKgKXB48pY20yFQKzviiuu8OzsbK+qqvKsrCz/7W9/u9/yu+66y/Pz80OqLoYUCcT66vb5Xp999pkD/vbbb4dYWfra2+dbt2717Oxsf+21136wfPjw4SFVF9vBBGLMs8xmlgnMAM4C1gN/MbO57r6iXtMX3P26g9h7T3r5+flEIhEqKysZNGgQf/3rX/db/tVXX9GnT5+QqktPdfs8JycHgKKiIvr168eIESNCri497e1zM6OmpobMzMz9lmdmZlJbWxtSdQkWKzGBocCf6ry+Fbi1XpvxwPSmJHEyjxA3b97sZ555pj/99NP++eef+zfffOMvvviid+/e3UeMGOHu7nPmzPHs7Gx//PHH/euvv/aioiLPysrSMcRmiqfP3d137drlnTp18t/97nchVpse4unz008/3QcOHOgLFizwb775xmfPnu1t27b1Rx55JOTqG0cid5mBi4A/1Hl9af3wCwJxI7AMeBk4KtZ2kzkQKysr/dZbb/XCwkLv3Lmzt2vXzvv37++//OUvvby8fF+72bNn+zHHHONt27b1H//4x/5f//VfIVYdQ5IHYrx9PmvWLM/MzPTS0tIQq00P8fT5xo0bffz48d6zZ09v27atH3fccX7//fd7bW1tyNU37mAC0aLrN87MLgJGuvuVwetLgX/0OrvHZnY4sNPdq8zsKuD/uvuwBrY1GZgM0Lt374K1a9c2eUQrzWQW/TPG37dIqjOztcDmOm8VuXtRPOvGc6dKKXBUnde9gvf2cffyOi//ANzX0IaCoooACgsL9S9TRBJhs7sXNmfFeC7M/gtwjJn1M7Mc4BJgbt0GZtajzsvzAd3QKyIpJ+YI0d0jZnYd8CcgE5jl7svN7C6i++pzgRvM7HwgAnxP9JiiiEhKiXkMMVEKCwu9pKQklM8+JOkYohwizGxJIneZRUQOCQpEEZGAAlFEJKBAFBEJKBBFRAIKRJFDwP/+7/8ybtw4jj76aAoKChg6dChz5swB4IMPPmDw4MEMGDCAAQMGUFS0/00dkUiEbt268etf/3q/98844wzS7UoRBaJImnN3fvazn3HaaafxzTffsGTJEp5//nnWr1/Pd999x7hx43jsscdYtWoVH3zwAY8//jj//d//vW/94uJijj32WF566SXCukyvtSgQRdLc/PnzycnJ4eqrr973Xp8+fbj++uuZMWMG48eP5+STTwaga9eu3Hfffdxzzz372j733HPceOON9O7dm48++qjV629NCkSRNLd8+fJ9gdfQsoKCgv3eKywsZPny5QBUVlbyzjvvMGrUKMaOHctzzz2X8HrDpEAUOcRMmTKFn/zkJwwaNChm29dff50zzzyTdu3aMXr0aF577TX27NnTClWGQ4EokuYGDhzIp59+uu/1jBkzePfddykrKyM/P58lS5bs137JkiUMHDgQiO4uv/POO/Tt25eCggLKy8uZP39+q9bfmhSIImlu2LBhVFZW8uijj+57r6KiAoiOFp944gmWLl0KQHl5Ob/61a+45ZZb2L59O3/+85/59ttvWbNmDWvWrGHGjBlpvdusQBRJc2bGa6+9xsKFC+nXrx+DBw/m8ssv595776VHjx4888wzTJo0iQEDBnDKKacwceJERo0axZw5cxg2bBht2rTZt60LLriAefPmUVVVBcB5551Hr1696NWrF2PGjAnrR2wxmu3mUKHZbuQQodluRERagAJRRCSgQBQRCSgQRUQCCkQRkYACUUQkoEAUEQkoEEVEAgpEEZGAAlFEJKBAFBEJKBBFRAIKRBGRgAJRRCSgQBQRCSgQRUQCCkQRkYACUUQkoEAUEQkoEEVEAgpEEZGAAlFEJBBXIJrZSDP7q5mtNrNfN7C8jZm9ECz/xMz6tnilIiIJFjMQzSwTmAGcA+QDY80sv16zK4At7t4feAi4t6ULFRFJtHhGiIOB1e7+jbtXA88DF9RrcwHwZPD8ZWC42d5vRhcRSQ3xBGIesK7O6/XBew22cfcIsA04vCUKFBFpLVmt+WFmNhmYHLysMrMvW/PzW1BXYHPYRTRDV8xSsW5I5T5PzbohdWs/3sxK6rwucveieFaMJxBLgaPqvO4VvNdQm/VmlgUcBpTX31BQVBGAmZW4e2E8RSabVK09VeuG1K09VeuG1K39YOqOZ5f5L8AxZtbPzHKAS4C59drMBS4Pnl8EzHd3b05BIiJhiTlCdPeImV0H/AnIBGa5+3Izuwsocfe5wEzgaTNbDXxPNDRFRFJKXMcQ3f0N4I16791e53klMKaJnx3XPn2SStXaU7VuSN3aU7VuSN3am123ac9WRCRKt+6JiAQSHoipettfHHWPN7MyM1saPK4Mo876zGyWmW1q7JImi3ok+LmWmdnJrV1jY+Ko/Qwz21anz29vqF1rM7OjzGyBma0ws+VmdmMDbZKu3+OsO1n7vK2ZLTazz4Pa72ygTdOzxd0T9iB6EuZ/gKOBHOBzIL9em2uBx4LnlwAvJLKmFqx7PDA97FobqP004GTgy0aWnwu8CRgwBPgk7JqbUPsZwOth19lAXT2Ak4PnHYGvGvh9Sbp+j7PuZO1zA3KD59nAJ8CQem2anC2JHiGm6m1/8dSdlNz9faJn+htzAfCUR30MdDazHq1T3YHFUXtScveN7v5p8HwHsJIf3s2VdP0eZ91JKejHncHL7OBR/4RIk7Ml0YGYqrf9xVM3wOhg9+dlMzuqgeXJKN6fLVkNDXaT3jSzgWEXU1+wW3YS0RFLXUnd7weoG5K0z80s08yWApuAYndvtM/jzRadVGm+eUBfdz8BKObv/xNJ4nwK9HH3nwC/B14Lt5z9mVku8Arw/9x9e9j1xCtG3Unb5+6+x91PJHr33GAzO/5gt5noQGzKbX8c6La/Vhazbncvd/eq4OUfgIJWqu1gxfN3kpTcffve3SSPXhubbWZdQy4LADPLJhoqz7r7qw00Scp+j1V3Mvf5Xu6+FVgAjKy3qMnZkuhATNXb/mLWXe/4z/lEj7+kgrnAZcFZzyHANnffGHZR8TCzI/ceAzKzwUR/f8P+z5OgppnASnd/sJFmSdfv8dSdxH3ezcw6B8/bAWcBq+o1a3K2JHS2G0/R2/7irPsGMzsfiBCte3xoBddhZs8RPTPY1czWA3cQPeCMuz9G9I6jc4HVQAUwIZxKfyiO2i8CrjGzCLAbuCQJ/vMEOBW4FPgiOKYF8K9Ab0jqfo+n7mTt8x7AkxadwDoDeNHdXz/YbNGdKiIiAZ1UEREJKBBFRAIKRBGRgAJRRCSgQBQRCSgQRUQCCkQRkYACUUQk8P8BkGhN8Qedx5cAAAAASUVORK5CYII=\n",
      "text/plain": [
       "<Figure size 360x360 with 1 Axes>"
      ]
     },
     "metadata": {
      "needs_background": "light"
     },
     "output_type": "display_data"
    }
   ],
   "source": [
    "# 初期位置での迷路の様子\n",
    "\n",
    "# 図を描く大きさと、図の変数名を宣言\n",
    "fig = plt.figure(figsize=(5, 5))\n",
    "ax = plt.gca()\n",
    "\n",
    "# 赤い壁を描く\n",
    "plt.plot([1, 1], [0, 1], color='red', linewidth=2)\n",
    "plt.plot([1, 2], [2, 2], color='red', linewidth=2)\n",
    "plt.plot([2, 2], [2, 1], color='red', linewidth=2)\n",
    "plt.plot([2, 3], [1, 1], color='red', linewidth=2)\n",
    "\n",
    "# 状態を示す文字S0～S8を描く\n",
    "plt.text(0.5, 2.5, 'S0', size=14, ha='center')\n",
    "plt.text(1.5, 2.5, 'S1', size=14, ha='center')\n",
    "plt.text(2.5, 2.5, 'S2', size=14, ha='center')\n",
    "plt.text(0.5, 1.5, 'S3', size=14, ha='center')\n",
    "plt.text(1.5, 1.5, 'S4', size=14, ha='center')\n",
    "plt.text(2.5, 1.5, 'S5', size=14, ha='center')\n",
    "plt.text(0.5, 0.5, 'S6', size=14, ha='center')\n",
    "plt.text(1.5, 0.5, 'S7', size=14, ha='center')\n",
    "plt.text(2.5, 0.5, 'S8', size=14, ha='center')\n",
    "plt.text(0.5, 2.3, 'START', ha='center')\n",
    "plt.text(2.5, 0.3, 'GOAL', ha='center')\n",
    "\n",
    "# 描画範囲の設定と目盛りを消す設定\n",
    "ax.set_xlim(0, 3)\n",
    "ax.set_ylim(0, 3)\n",
    "plt.tick_params(axis='both', which='both', bottom='off', top='off',\n",
    "                labelbottom='off', right='off', left='off', labelleft='off')\n",
    "\n",
    "# 現在地S0に緑丸を描画する\n",
    "line, = ax.plot([0.5], [2.5], marker=\"o\", color='g', markersize=60)\n"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 3,
   "metadata": {},
   "outputs": [],
   "source": [
    "# 初期の方策を決定するパラメータtheta_0を設定\n",
    "\n",
    "# 行は状態0～7、列は移動方向で↑、→、↓、←を表す\n",
    "theta_0 = np.array([[np.nan, 1, 1, np.nan],  # s0\n",
    "                    [np.nan, 1, np.nan, 1],  # s1\n",
    "                    [np.nan, np.nan, 1, 1],  # s2\n",
    "                    [1, 1, 1, np.nan],  # s3\n",
    "                    [np.nan, np.nan, 1, 1],  # s4\n",
    "                    [1, np.nan, np.nan, np.nan],  # s5\n",
    "                    [1, np.nan, np.nan, np.nan],  # s6\n",
    "                    [1, 1, np.nan, np.nan],  # s7、※s8はゴールなので、方策はなし\n",
    "                    ])\n"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 4,
   "metadata": {},
   "outputs": [],
   "source": [
    "# 方策パラメータtheta_0をランダム方策piに変換する関数の定義\n",
    "\n",
    "\n",
    "def simple_convert_into_pi_from_theta(theta):\n",
    "    '''単純に割合を計算する'''\n",
    "\n",
    "    [m, n] = theta.shape  # thetaの行列サイズを取得\n",
    "    pi = np.zeros((m, n))\n",
    "    for i in range(0, m):\n",
    "        pi[i, :] = theta[i, :] / np.nansum(theta[i, :])  # 割合の計算\n",
    "\n",
    "    pi = np.nan_to_num(pi)  # nanを0に変換\n",
    "\n",
    "    return pi\n",
    "\n",
    "# ランダム行動方策pi_0を求める\n",
    "pi_0 = simple_convert_into_pi_from_theta(theta_0)\n"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 6,
   "metadata": {},
   "outputs": [
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "Q: [[       nan 0.08560481 0.0225177         nan]\n",
      " [       nan 0.07030749        nan 0.09457712]\n",
      " [       nan        nan 0.00056941 0.04161894]\n",
      " [0.00735845 0.09829591 0.06016075        nan]\n",
      " [       nan        nan 0.00996884 0.02289049]\n",
      " [0.04022925        nan        nan        nan]\n",
      " [0.05309568        nan        nan        nan]\n",
      " [0.02735694 0.05136969        nan        nan]]\n",
      "0.0856048144089564\n"
     ]
    }
   ],
   "source": [
    "# 初期の行動価値関数Qを設定\n",
    "\n",
    "[a, b] = theta_0.shape  # 行と列の数をa, bに格納\n",
    "Q = np.random.rand(a, b) * theta_0 * 0.1\n",
    "# *theta0をすることで要素ごとに掛け算をし、Qの壁方向の値がnanになる\n",
    "print(\"Q:\",Q)\n",
    "print(np.nanmax(Q[0,:]))\n"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 7,
   "metadata": {},
   "outputs": [],
   "source": [
    "# ε-greedy法を実装\n",
    "\n",
    "\n",
    "def get_action(s, Q, epsilon, pi_0):\n",
    "    direction = [\"up\", \"right\", \"down\", \"left\"]\n",
    "\n",
    "    # 行動を決める\n",
    "    if np.random.rand() < epsilon:\n",
    "        # εの確率でランダムに動く\n",
    "        next_direction = np.random.choice(direction, p=pi_0[s, :])\n",
    "    else:\n",
    "        # Qの最大値の行動を採用する\n",
    "        next_direction = direction[np.nanargmax(Q[s, :])]\n",
    "\n",
    "    # 行動をindexに\n",
    "    if next_direction == \"up\":\n",
    "        action = 0\n",
    "    elif next_direction == \"right\":\n",
    "        action = 1\n",
    "    elif next_direction == \"down\":\n",
    "        action = 2\n",
    "    elif next_direction == \"left\":\n",
    "        action = 3\n",
    "\n",
    "    return action\n",
    "\n",
    "\n",
    "def get_s_next(s, a, Q, epsilon, pi_0):\n",
    "    direction = [\"up\", \"right\", \"down\", \"left\"]\n",
    "    next_direction = direction[a]  # 行動aの方向\n",
    "\n",
    "    # 行動から次の状態を決める\n",
    "    if next_direction == \"up\":\n",
    "        s_next = s - 3  # 上に移動するときは状態の数字が3小さくなる\n",
    "    elif next_direction == \"right\":\n",
    "        s_next = s + 1  # 右に移動するときは状態の数字が1大きくなる\n",
    "    elif next_direction == \"down\":\n",
    "        s_next = s + 3  # 下に移動するときは状態の数字が3大きくなる\n",
    "    elif next_direction == \"left\":\n",
    "        s_next = s - 1  # 左に移動するときは状態の数字が1小さくなる\n",
    "\n",
    "    return s_next\n"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 8,
   "metadata": {},
   "outputs": [],
   "source": [
    "# Q学習による行動価値関数Qの更新\n",
    "\n",
    "\n",
    "def Q_learning(s, a, r, s_next, Q, eta, gamma):\n",
    "\n",
    "    if s_next == 8:  # ゴールした場合\n",
    "        Q[s, a] = Q[s, a] + eta * (r - Q[s, a])\n",
    "\n",
    "    else:\n",
    "        Q[s, a] = Q[s, a] + eta * (r + gamma * np.nanmax(Q[s_next,: ]) - Q[s, a])\n",
    "\n",
    "    return Q"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 9,
   "metadata": {},
   "outputs": [],
   "source": [
    "# Q学習で迷路を解く関数の定義、状態と行動の履歴および更新したQを出力\n",
    "\n",
    "\n",
    "def goal_maze_ret_s_a_Q(Q, epsilon, eta, gamma, pi):\n",
    "    s = 0  # スタート地点\n",
    "    a = a_next = get_action(s, Q, epsilon, pi)  # 初期の行動\n",
    "    s_a_history = [[0, np.nan]]  # エージェントの移動を記録するリスト\n",
    "\n",
    "    while (1):  # ゴールするまでループ\n",
    "        a = a_next  # 行動更新\n",
    "\n",
    "        s_a_history[-1][1] = a\n",
    "        # 現在の状態（つまり一番最後なのでindex=-1）に行動を代入\n",
    "\n",
    "        s_next = get_s_next(s, a, Q, epsilon, pi)\n",
    "        # 次の状態を格納\n",
    "\n",
    "        s_a_history.append([s_next, np.nan])\n",
    "        # 次の状態を代入。行動はまだ分からないのでnanにしておく\n",
    "\n",
    "        # 報酬を与え,　次の行動を求めます\n",
    "        if s_next == 8:\n",
    "            r = 1  # ゴールにたどり着いたなら報酬を与える\n",
    "            a_next = np.nan\n",
    "        else:\n",
    "            r = 0\n",
    "            a_next = get_action(s_next, Q, epsilon, pi)\n",
    "            # 次の行動a_nextを求めます。\n",
    "\n",
    "        # 価値関数を更新\n",
    "        Q = Q_learning(s, a, r, s_next, Q, eta, gamma)\n",
    "\n",
    "        # 終了判定\n",
    "        if s_next == 8:  # ゴール地点なら終了\n",
    "            break\n",
    "        else:\n",
    "            s = s_next\n",
    "\n",
    "    return [s_a_history, Q]\n"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 10,
   "metadata": {},
   "outputs": [
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "エピソード:1\n",
      "0.1554169942026278\n",
      "迷路を解くのにかかったステップ数は50です\n",
      "エピソード:2\n",
      "0.15134954457495184\n",
      "迷路を解くのにかかったステップ数は56です\n",
      "エピソード:3\n",
      "0.1969391836132622\n",
      "迷路を解くのにかかったステップ数は430です\n",
      "エピソード:4\n",
      "0.09584172179114517\n",
      "迷路を解くのにかかったステップ数は14です\n",
      "エピソード:5\n",
      "0.09567998005212376\n",
      "迷路を解くのにかかったステップ数は16です\n",
      "エピソード:6\n",
      "0.09336075392417753\n",
      "迷路を解くのにかかったステップ数は12です\n",
      "エピソード:7\n",
      "0.0906738087493114\n",
      "迷路を解くのにかかったステップ数は8です\n",
      "エピソード:8\n",
      "0.08913405969939943\n",
      "迷路を解くのにかかったステップ数は4です\n",
      "エピソード:9\n",
      "0.08817170403632607\n",
      "迷路を解くのにかかったステップ数は4です\n",
      "エピソード:10\n",
      "0.08714931027214809\n",
      "迷路を解くのにかかったステップ数は4です\n",
      "エピソード:11\n",
      "0.08605770314114251\n",
      "迷路を解くのにかかったステップ数は4です\n",
      "エピソード:12\n",
      "0.08488991578662411\n",
      "迷路を解くのにかかったステップ数は4です\n",
      "エピソード:13\n",
      "0.08364119330124478\n",
      "迷路を解くのにかかったステップ数は4です\n",
      "エピソード:14\n",
      "0.08230890505150294\n",
      "迷路を解くのにかかったステップ数は4です\n",
      "エピソード:15\n",
      "0.08089239414425348\n",
      "迷路を解くのにかかったステップ数は4です\n",
      "エピソード:16\n",
      "0.07939278628783385\n",
      "迷路を解くのにかかったステップ数は4です\n",
      "エピソード:17\n",
      "0.07781277530036575\n",
      "迷路を解くのにかかったステップ数は4です\n",
      "エピソード:18\n",
      "0.07615639844279644\n",
      "迷路を解くのにかかったステップ数は4です\n",
      "エピソード:19\n",
      "0.07442881145156811\n",
      "迷路を解くのにかかったステップ数は4です\n",
      "エピソード:20\n",
      "0.0726360704860736\n",
      "迷路を解くのにかかったステップ数は4です\n",
      "エピソード:21\n",
      "0.07078492607986811\n",
      "迷路を解くのにかかったステップ数は4です\n",
      "エピソード:22\n",
      "0.06888263249966167\n",
      "迷路を解くのにかかったステップ数は4です\n",
      "エピソード:23\n",
      "0.06693677459463956\n",
      "迷路を解くのにかかったステップ数は4です\n",
      "エピソード:24\n",
      "0.06495511319532907\n",
      "迷路を解くのにかかったステップ数は4です\n",
      "エピソード:25\n",
      "0.0629454493412489\n",
      "迷路を解くのにかかったステップ数は4です\n",
      "エピソード:26\n",
      "0.060915507034136895\n",
      "迷路を解くのにかかったステップ数は4です\n",
      "エピソード:27\n",
      "0.05887283379046046\n",
      "迷路を解くのにかかったステップ数は4です\n",
      "エピソード:28\n",
      "0.05682471797137856\n",
      "迷路を解くのにかかったステップ数は4です\n",
      "エピソード:29\n",
      "0.05477812167407414\n",
      "迷路を解くのにかかったステップ数は4です\n",
      "エピソード:30\n",
      "0.052739627853653714\n",
      "迷路を解くのにかかったステップ数は4です\n",
      "エピソード:31\n",
      "0.050715400291770796\n",
      "迷路を解くのにかかったステップ数は4です\n",
      "エピソード:32\n",
      "0.04871115502219697\n",
      "迷路を解くのにかかったステップ数は4です\n",
      "エピソード:33\n",
      "0.04673214185279739\n",
      "迷路を解くのにかかったステップ数は4です\n",
      "エピソード:34\n",
      "0.044783134678214376\n",
      "迷路を解くのにかかったステップ数は4です\n",
      "エピソード:35\n",
      "0.042868429350272075\n",
      "迷路を解くのにかかったステップ数は4です\n",
      "エピソード:36\n",
      "0.040991847957551786\n",
      "迷路を解くのにかかったステップ数は4です\n",
      "エピソード:37\n",
      "0.03915674845691619\n",
      "迷路を解くのにかかったステップ数は4です\n",
      "エピソード:38\n",
      "0.037366038694214176\n",
      "迷路を解くのにかかったステップ数は4です\n",
      "エピソード:39\n",
      "0.035622193946065306\n",
      "迷路を解くのにかかったステップ数は4です\n",
      "エピソード:40\n",
      "0.0339272772073726\n",
      "迷路を解くのにかかったステップ数は4です\n",
      "エピソード:41\n",
      "0.03228296153840948\n",
      "迷路を解くのにかかったステップ数は4です\n",
      "エピソード:42\n",
      "0.030690553869895998\n",
      "迷路を解くのにかかったステップ数は4です\n",
      "エピソード:43\n",
      "0.029151019743640094\n",
      "迷路を解くのにかかったステップ数は4です\n",
      "エピソード:44\n",
      "0.02766500853965531\n",
      "迷路を解くのにかかったステップ数は4です\n",
      "エピソード:45\n",
      "0.026232878807934068\n",
      "迷路を解くのにかかったステップ数は4です\n",
      "エピソード:46\n",
      "0.024854723384253985\n",
      "迷路を解くのにかかったステップ数は4です\n",
      "エピソード:47\n",
      "0.023530394024586898\n",
      "迷路を解くのにかかったステップ数は4です\n",
      "エピソード:48\n",
      "0.022259525342106956\n",
      "迷路を解くのにかかったステップ数は4です\n",
      "エピソード:49\n",
      "0.021041557874691308\n",
      "迷路を解くのにかかったステップ数は4です\n",
      "エピソード:50\n",
      "0.019875760149535293\n",
      "迷路を解くのにかかったステップ数は4です\n",
      "エピソード:51\n",
      "0.01876124964537995\n",
      "迷路を解くのにかかったステップ数は4です\n",
      "エピソード:52\n",
      "0.017697012582269744\n",
      "迷路を解くのにかかったステップ数は4です\n",
      "エピソード:53\n",
      "0.01668192249408451\n",
      "迷路を解くのにかかったステップ数は4です\n",
      "エピソード:54\n",
      "0.01571475756069074\n",
      "迷路を解くのにかかったステップ数は4です\n",
      "エピソード:55\n",
      "0.014794216694801077\n",
      "迷路を解くのにかかったステップ数は4です\n",
      "エピソード:56\n",
      "0.013918934393849947\n",
      "迷路を解くのにかかったステップ数は4です\n",
      "エピソード:57\n",
      "0.013087494379732334\n",
      "迷路を解くのにかかったステップ数は4です\n",
      "エピソード:58\n",
      "0.012298442059395787\n",
      "迷路を解くのにかかったステップ数は4です\n",
      "エピソード:59\n",
      "0.011550295847333691\n",
      "迷路を解くのにかかったステップ数は4です\n",
      "エピソード:60\n",
      "0.010841557397231916\n",
      "迷路を解くのにかかったステップ数は4です\n",
      "エピソード:61\n",
      "0.01017072079464909\n",
      "迷路を解くのにかかったステップ数は4です\n",
      "エピソード:62\n",
      "0.009536280765845118\n",
      "迷路を解くのにかかったステップ数は4です\n",
      "エピソード:63\n",
      "0.008936739959940043\n",
      "迷路を解くのにかかったステップ数は4です\n",
      "エピソード:64\n",
      "0.008370615362650602\n",
      "迷路を解くのにかかったステップ数は4です\n",
      "エピソード:65\n",
      "0.007836443900067769\n",
      "迷路を解くのにかかったステップ数は4です\n",
      "エピソード:66\n",
      "0.007332787290476928\n",
      "迷路を解くのにかかったステップ数は4です\n",
      "エピソード:67\n",
      "0.006858236201159551\n",
      "迷路を解くのにかかったステップ数は4です\n",
      "エピソード:68\n",
      "0.0064114137656196\n",
      "迷路を解くのにかかったステップ数は4です\n",
      "エピソード:69\n",
      "0.00599097851479069\n",
      "迷路を解くのにかかったステップ数は4です\n",
      "エピソード:70\n",
      "0.005595626773635121\n",
      "迷路を解くのにかかったステップ数は4です\n",
      "エピソード:71\n",
      "0.0052240945721696574\n",
      "迷路を解くのにかかったステップ数は4です\n",
      "エピソード:72\n",
      "0.0048751591174504005\n",
      "迷路を解くのにかかったステップ数は4です\n",
      "エピソード:73\n",
      "0.004547639870443154\n",
      "迷路を解くのにかかったステップ数は4です\n",
      "エピソード:74\n",
      "0.00424039926905051\n",
      "迷路を解くのにかかったステップ数は4です\n",
      "エピソード:75\n",
      "0.0039523431359134165\n",
      "迷路を解くのにかかったステップ数は4です\n",
      "エピソード:76\n",
      "0.0036824208069559106\n",
      "迷路を解くのにかかったステップ数は4です\n",
      "エピソード:77\n",
      "0.003429625014055415\n",
      "迷路を解くのにかかったステップ数は4です\n",
      "エピソード:78\n",
      "0.0031929915526930364\n",
      "迷路を解くのにかかったステップ数は4です\n",
      "エピソード:79\n",
      "0.0029715987629970186\n",
      "迷路を解くのにかかったステップ数は4です\n",
      "エピソード:80\n",
      "0.0027645668502420673\n",
      "迷路を解くのにかかったステップ数は4です\n",
      "エピソード:81\n",
      "0.0025710570686375833\n",
      "迷路を解くのにかかったステップ数は4です\n",
      "エピソード:82\n",
      "0.0023902707900959053\n",
      "迷路を解くのにかかったステップ数は4です\n",
      "エピソード:83\n",
      "0.0022214484776704735\n",
      "迷路を解くのにかかったステップ数は4です\n",
      "エピソード:84\n",
      "0.0020638685814508007\n",
      "迷路を解くのにかかったステップ数は4です\n",
      "エピソード:85\n",
      "0.0019168463729311025\n",
      "迷路を解くのにかかったステップ数は4です\n",
      "エピソード:86\n",
      "0.0017797327322057743\n",
      "迷路を解くのにかかったステップ数は4です\n",
      "エピソード:87\n",
      "0.0016519129008071287\n",
      "迷路を解くのにかかったステップ数は4です\n",
      "エピソード:88\n",
      "0.0015328052115627377\n",
      "迷路を解くのにかかったステップ数は4です\n",
      "エピソード:89\n",
      "0.0014218598055344422\n",
      "迷路を解くのにかかったステップ数は4です\n",
      "エピソード:90\n",
      "0.0013185573448836196\n",
      "迷路を解くのにかかったステップ数は4です\n",
      "エピソード:91\n",
      "0.0012224077293800928\n",
      "迷路を解くのにかかったステップ数は4です\n",
      "エピソード:92\n",
      "0.001132948823266755\n",
      "迷路を解くのにかかったステップ数は4です\n",
      "エピソード:93\n",
      "0.0010497451982417472\n",
      "迷路を解くのにかかったステップ数は4です\n",
      "エピソード:94\n",
      "0.0009723868974889083\n",
      "迷路を解くのにかかったステップ数は4です\n",
      "エピソード:95\n",
      "0.0009004882249040724\n",
      "迷路を解くのにかかったステップ数は4です\n",
      "エピソード:96\n",
      "0.0008336865629867685\n",
      "迷路を解くのにかかったステップ数は4です\n",
      "エピソード:97\n",
      "0.0007716412222293911\n",
      "迷路を解くのにかかったステップ数は4です\n",
      "エピソード:98\n",
      "0.0007140323242881275\n",
      "迷路を解くのにかかったステップ数は4です\n",
      "エピソード:99\n",
      "0.0006605597207121061\n",
      "迷路を解くのにかかったステップ数は4です\n",
      "エピソード:100\n",
      "0.0006109419485686995\n",
      "迷路を解くのにかかったステップ数は4です\n"
     ]
    }
   ],
   "source": [
    "# Q学習で迷路を解く\n",
    "\n",
    "eta = 0.1  # 学習率\n",
    "gamma = 0.9  # 時間割引率\n",
    "epsilon = 0.5  # ε-greedy法の初期値\n",
    "v = np.nanmax(Q, axis=1)  # 状態ごとに価値の最大値を求める\n",
    "is_continue = True\n",
    "episode = 1\n",
    "\n",
    "V = []  # エピソードごとの状態価値を格納する\n",
    "V.append(np.nanmax(Q, axis=1))  # 状態ごとに行動価値の最大値を求める\n",
    "\n",
    "while is_continue:  # is_continueがFalseになるまで繰り返す\n",
    "    print(\"episode:\" + str(episode))\n",
    "\n",
    "    # ε-greedyの値を少しずつ小さくする\n",
    "    epsilon = epsilon / 2\n",
    "\n",
    "    # Q学習で迷路を解き、移動した履歴と更新したQを求める\n",
    "    [s_a_history, Q] = goal_maze_ret_s_a_Q(Q, epsilon, eta, gamma, pi_0)\n",
    "\n",
    "    # 状態価値の変化\n",
    "    new_v = np.nanmax(Q, axis=1)  # 状態ごとに行動価値の最大値を求める\n",
    "    print(np.sum(np.abs(new_v - v)))  # 状態価値関数の変化を出力\n",
    "    v = new_v\n",
    "    V.append(v)  # このエピソード終了時の状態価値関数を追加\n",
    "\n",
    "    print(\"所需步数：\" + str(len(s_a_history) - 1))\n",
    "\n",
    "    # 100エピソード繰り返す\n",
    "    episode = episode + 1\n",
    "    if episode > 100:\n",
    "        break\n"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {},
   "outputs": [],
   "source": [
    "# 状態価値の変化を可視化します\n",
    "# 参考URL http://louistiao.me/posts/notebooks/embedding-matplotlib-animations-in-jupyter-notebooks/\n",
    "from matplotlib import animation\n",
    "from IPython.display import HTML\n",
    "import matplotlib.cm as cm  # color map\n",
    "\n",
    "\n",
    "def init():\n",
    "    # 背景画像の初期化\n",
    "    line.set_data([], [])\n",
    "    return (line,)\n",
    "\n",
    "\n",
    "def animate(i):\n",
    "    # フレームごとの描画内容\n",
    "    # 各マスに状態価値の大きさに基づく色付きの四角を描画\n",
    "    line, = ax.plot([0.5], [2.5], marker=\"s\",\n",
    "                    color=cm.jet(V[i][0]), markersize=85)  # S0\n",
    "    line, = ax.plot([1.5], [2.5], marker=\"s\",\n",
    "                    color=cm.jet(V[i][1]), markersize=85)  # S1\n",
    "    line, = ax.plot([2.5], [2.5], marker=\"s\",\n",
    "                    color=cm.jet(V[i][2]), markersize=85)  # S2\n",
    "    line, = ax.plot([0.5], [1.5], marker=\"s\",\n",
    "                    color=cm.jet(V[i][3]), markersize=85)  # S3\n",
    "    line, = ax.plot([1.5], [1.5], marker=\"s\",\n",
    "                    color=cm.jet(V[i][4]), markersize=85)  # S4\n",
    "    line, = ax.plot([2.5], [1.5], marker=\"s\",\n",
    "                    color=cm.jet(V[i][5]), markersize=85)  # S5\n",
    "    line, = ax.plot([0.5], [0.5], marker=\"s\",\n",
    "                    color=cm.jet(V[i][6]), markersize=85)  # S6\n",
    "    line, = ax.plot([1.5], [0.5], marker=\"s\",\n",
    "                    color=cm.jet(V[i][7]), markersize=85)  # S7\n",
    "    line, = ax.plot([2.5], [0.5], marker=\"s\",\n",
    "                    color=cm.jet(1.0), markersize=85)  # S8\n",
    "    return (line,)\n",
    "\n",
    "\n",
    "#　初期化関数とフレームごとの描画関数を用いて動画を作成\n",
    "anim = animation.FuncAnimation(\n",
    "    fig, animate, init_func=init, frames=len(V), interval=200, repeat=False)\n",
    "\n",
    "HTML(anim.to_jshtml())\n"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {},
   "outputs": [],
   "source": []
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {},
   "outputs": [],
   "source": []
  }
 ],
 "metadata": {
  "kernelspec": {
   "display_name": "Python 3",
   "language": "python",
   "name": "python3"
  },
  "language_info": {
   "codemirror_mode": {
    "name": "ipython",
    "version": 3
   },
   "file_extension": ".py",
   "mimetype": "text/x-python",
   "name": "python",
   "nbconvert_exporter": "python",
   "pygments_lexer": "ipython3",
   "version": "3.7.0"
  }
 },
 "nbformat": 4,
 "nbformat_minor": 2
}
